#excess covid deaths per 100,000 (in 2021) for every 10% increase in poverty
kable(cbind(reg_ye_2021.beta,reg_ye_2021.lcl,reg_ye_2021.ucl,reg_ye_2021.p/10)[2,]*10)
| x | |
|---|---|
| reg_ye_2021.beta | 25.27894 |
| reg_ye_2021.lcl | 20.04640 |
| reg_ye_2021.ucl | 30.51149 |
| 0.00000 |
#reduction in covid deaths per 100,000 (in 2021) for every 10% increase in vaccination rate
kable(cbind(reg_ym_vax.beta,reg_ym_vax.lcl,reg_ym_vax.ucl,reg_ym_vax.p/10)[2,]*10)
| x | |
|---|---|
| reg_ym_vax.beta | -7.150849 |
| reg_ym_vax.lcl | -9.528206 |
| reg_ym_vax.ucl | -4.773492 |
| 0.000000 |
| Characteristic | N = 3,1421 |
|---|---|
| Proportion aged 65+ years (%) | 19.8 (4.8) |
| Proportion of males (%) | 50.1 (2.3) |
| Proportion of Asians, Native Hawaiians, or other Pacific Islanders, non-Hispanic (%) | 1.4 (2.9) |
| Proportion of Hispanics (%) | 9.2 (13.7) |
| Proportion of non-Hispanic Whites (%) | 76.2 (20.2) |
| (Missing) | 1 |
| Proportion of non-Hispanic Blacks (%) | 8.9 (14.5) |
| Proportion of American Indians/Alaska Natives, non-Hispanic (%) | 1.8 (7.6) |
| Proportion living in poverty (%) | 14.5 (5.8) |
| (Missing) | 1 |
| Proportion without 4+ years of college education (%) | 78.0 (9.6) |
| Proportion of smokers (%) | 20.5 (4.0) |
| (Missing) | 316 |
| Proportion of adults with diabetes (%) | 8.7 (1.6) |
| (Missing) | 21 |
| Proportion of obese adults (%) | 36.0 (4.0) |
| (Missing) | 316 |
| Proportion of adults with chronic obstructive pulmonary disease (%) | 7.4 (1.8) |
| (Missing) | 21 |
| Proportion of adults with high blood pressure (%) | 32.7 (4.9) |
| (Missing) | 21 |
| Proportion of adults with coronary heart disease (%) | 6.2 (1.0) |
| (Missing) | 21 |
| Proportion of adults with stroke (%) | 3.4 (0.7) |
| (Missing) | 21 |
| Percent fully vaccinated by December 31st, 2021 (%) | 52.4 (12.5) |
| (Missing) | 16 |
| COVID-19 deaths per 100,000 in April-December 2021 | 120.9 (71.8) |
| (Missing) | 9 |
|
1
Mean (SD)
|
|
#Variable selection
set.seed(123)
Y = "deaths_2021"
E = "pctPoverty"
M = c(
"Series_Complete_Pop_Pct"
)
C = c("PropMale",
"PropAbove65",
"no.college.diploma",
"native",
"asian",
"black",
"white",
"hispanic",
"state"
)
d = sdf[c("fips",Y,E,M,C)]
createfmla <- function(yvar, xvars) {
rhs <- paste(xvars, collapse=" + ")
return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
createint <- function(Y,E,M) {
list <- NULL
for(i in 1:length(M)){
list <- c(list,paste(E,M[i], sep="*"))
}
rhs <- paste(list, collapse=" + ")
return(as.formula(paste(Y, "~", rhs, sep=" ")))
}
set.seed(123)
#create dataset
d_l <- subset(d, state %in% c("AL","AK","AZ","CA","CO","CT","DE",
"FL","GA","HI","ID","IL","IN","DC",
"KS","KY","LA","ME","MD","MA","MI",
"MN","MT","MS","MO","NV","NH","NJ",
"NM","NY","NC","OH","OK","OR","PA",
"RI","SC","TN","TX","VA","VT","WA",
"WV","WI", #lockdown states
"AR","IA","NE","ND","SD","UT","WY" #no lockdown states
))
# max(d_l$Series_Complete_Pop_Pct,na.rm=TRUE)
# quantile(d_l$Series_Complete_Pop_Pct,0.90,na.rm=TRUE)
# quantile(d_l$Series_Complete_Pop_Pct,0.75,na.rm=TRUE)
# quantile(d_l$Series_Complete_Pop_Pct,0.50,na.rm=TRUE)
#impute with RF
set.seed(123)
d_l_mf <- d_l
d_l_mf$state <- factor(d_l_mf$state)
d_l_mf <- subset(d_l_mf, select=-c(fips))
dimp <- missForest(xmis = d_l_mf, ntree=100, maxiter = 10, parallelize = 'no')
# dimp$OOBerror
# dimp$error
d_l_rf<-cbind(d_l$fips, dimp$ximp)
createfmla <- function(yvar, xvars) {
rhs <- paste(xvars, collapse=" + ")
return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
set.seed(123)
#g-formula approach
#max values
mval.list <- list()
mval.list <- list(max(d_l$Series_Complete_Pop_Pct, na.rm=TRUE))
fit_l <- cmest(
data=d_l_rf, model="gformula",
outcome = Y, exposure = E, mediator = M, basec = C,
yreg="linear", mreg=rep(list("linear"),length(M)),
astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10,
EMint=TRUE,
mval = mval.list,
estimation = "imputation", multimp = FALSE,
inference = "bootstrap", nboot = 1000)
res_mediation_max_vax <- data.frame(
"Statistic" = c("Total effect", "Controlled direct effect", "Proportion eliminated"),
"Effect" =
round(c(summary(fit_l)$effect.pe["te"],summary(fit_l)$effect.pe["cde"],summary(fit_l)$effect.pe["pe"]),2),
"Lower CL" =
round(c(summary(fit_l)$effect.ci.low["te"],summary(fit_l)$effect.ci.low["cde"],summary(fit_l)$effect.ci.low["pe"]),2),
"Upper CL" =
round(c(summary(fit_l)$effect.ci.high["te"],summary(fit_l)$effect.ci.high["cde"],summary(fit_l)$effect.ci.high["pe"]),2),
"P-value" =
round(c(summary(fit_l)$effect.pval["te"],summary(fit_l)$effect.pval["cde"],summary(fit_l)$effect.pval["pe"]),3)
)
res_mediation_max_vax %>%
gt() %>%
cols_label(
Statistic = " ",
Lower.CL = "Lower CL",
Upper.CL = "Upper CL",
P.value = "P-value"
) %>% gt::gtsave(
filename = sprintf("%s/tables/mediation_max_vax.html",rootdir))
#90th percentile
mval.list <- list()
mval.list <- list(quantile(d_l$Series_Complete_Pop_Pct,0.90,na.rm=TRUE))
fit_l <- cmest(
data=d_l_rf, model="gformula",
outcome = Y, exposure = E, mediator = M, basec = C,
yreg="linear", mreg=rep(list("linear"),length(M)),
astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10,
EMint=TRUE,
mval = mval.list,
estimation = "imputation", multimp = FALSE,
inference = "bootstrap", nboot = 1000)
res_mediation_0.9_vax <- data.frame(
"Statistic" = c("Total effect", "Controlled direct effect", "Proportion eliminated"),
"Effect" =
round(c(summary(fit_l)$effect.pe["te"],summary(fit_l)$effect.pe["cde"],summary(fit_l)$effect.pe["pe"]),2),
"Lower CL" =
round(c(summary(fit_l)$effect.ci.low["te"],summary(fit_l)$effect.ci.low["cde"],summary(fit_l)$effect.ci.low["pe"]),2),
"Upper CL" =
round(c(summary(fit_l)$effect.ci.high["te"],summary(fit_l)$effect.ci.high["cde"],summary(fit_l)$effect.ci.high["pe"]),2),
"P-value" =
round(c(summary(fit_l)$effect.pval["te"],summary(fit_l)$effect.pval["cde"],summary(fit_l)$effect.pval["pe"]),3)
)
res_mediation_0.9_vax %>%
gt() %>%
cols_label(
Statistic = " ",
Lower.CL = "Lower CL",
Upper.CL = "Upper CL",
P.value = "P-value"
) %>% gt::gtsave(
filename = sprintf("%s/tables/mediation_0.9_vax.html",rootdir))
#75th percentile
mval.list <- list()
mval.list <- list(quantile(d_l$Series_Complete_Pop_Pct,0.75,na.rm=TRUE))
fit_l <- cmest(
data=d_l_rf, model="gformula",
outcome = Y, exposure = E, mediator = M, basec = C,
yreg="linear", mreg=rep(list("linear"),length(M)),
astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10,
EMint=TRUE,
mval = mval.list,
estimation = "imputation", multimp = FALSE,
inference = "bootstrap", nboot = 1000)
res_mediation_0.75_vax <- data.frame(
"Statistic" = c("Total effect", "Controlled direct effect", "Proportion eliminated"),
"Effect" =
round(c(summary(fit_l)$effect.pe["te"],summary(fit_l)$effect.pe["cde"],summary(fit_l)$effect.pe["pe"]),2),
"Lower CL" =
round(c(summary(fit_l)$effect.ci.low["te"],summary(fit_l)$effect.ci.low["cde"],summary(fit_l)$effect.ci.low["pe"]),2),
"Upper CL" =
round(c(summary(fit_l)$effect.ci.high["te"],summary(fit_l)$effect.ci.high["cde"],summary(fit_l)$effect.ci.high["pe"]),2),
"P-value" =
round(c(summary(fit_l)$effect.pval["te"],summary(fit_l)$effect.pval["cde"],summary(fit_l)$effect.pval["pe"]),3)
)
res_mediation_0.75_vax %>%
gt() %>%
cols_label(
Statistic = " ",
Lower.CL = "Lower CL",
Upper.CL = "Upper CL",
P.value = "P-value"
) %>% gt::gtsave(
filename = sprintf("%s/tables/mediation_0.75_vax.html",rootdir))
#Vaccination rate set to max
knitr::kable(res_mediation_max_vax)
| Statistic | Effect | Lower.CL | Upper.CL | P.value | |
|---|---|---|---|---|---|
| te | Total effect | 25.43 | 20.29 | 30.51 | 0.00 |
| cde | Controlled direct effect | 4.99 | -6.87 | 17.56 | 0.39 |
| pe | Proportion eliminated | 0.80 | 0.34 | 1.26 | 0.00 |
#Vaccination rate set to 90th percentile
knitr::kable(res_mediation_0.9_vax)
| Statistic | Effect | Lower.CL | Upper.CL | P.value | |
|---|---|---|---|---|---|
| te | Total effect | 25.40 | 20.49 | 31.07 | 0 |
| cde | Controlled direct effect | 15.94 | 10.10 | 21.74 | 0 |
| pe | Proportion eliminated | 0.37 | 0.19 | 0.57 | 0 |
#Vaccination rate set to 75th percentile
knitr::kable(res_mediation_0.75_vax)
| Statistic | Effect | Lower.CL | Upper.CL | P.value | |
|---|---|---|---|---|---|
| te | Total effect | 25.39 | 20.30 | 31.81 | 0 |
| cde | Controlled direct effect | 19.95 | 14.96 | 25.80 | 0 |
| pe | Proportion eliminated | 0.21 | 0.11 | 0.32 | 0 |
###for each vax, obtain PE and risk reduction in COVID-19 deaths
#Variable selection
set.seed(123)
Y = "deaths_2021"
E = "pctPoverty"
M = c(
"Series_Complete_Pop_Pct"
)
C = c("PropMale",
"PropAbove65",
"no.college.diploma",
"native",
"asian",
"black",
"white",
"hispanic",
"state"
)
d = sdf[c("fips",Y,E,M,C)]
createfmla <- function(yvar, xvars) {
rhs <- paste(xvars, collapse=" + ")
return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
createint <- function(Y,E,M) {
list <- NULL
for(i in 1:length(M)){
list <- c(list,paste(E,M[i], sep="*"))
}
rhs <- paste(list, collapse=" + ")
return(as.formula(paste(Y, "~", rhs, sep=" ")))
}
set.seed(123)
#create dataset
d_l <- subset(d, state %in% c("AL","AK","AZ","CA","CO","CT","DE",
"FL","GA","HI","ID","IL","IN","DC",
"KS","KY","LA","ME","MD","MA","MI",
"MN","MT","MS","MO","NV","NH","NJ",
"NM","NY","NC","OH","OK","OR","PA",
"RI","SC","TN","TX","VA","VT","WA",
"WV","WI", #lockdown states
"AR","IA","NE","ND","SD","UT","WY" #no lockdown states
))
#impute with RF
set.seed(123)
d_l_mf <- d_l
d_l_mf$state <- factor(d_l_mf$state)
d_l_mf <- subset(d_l_mf, select=-c(fips))
dimp <- missForest(xmis = d_l_mf, ntree=100, maxiter = 10, parallelize = 'no')
# dimp$OOBerror
# dimp$error
d_l_rf<-cbind(d_l$fips, dimp$ximp)
vax <- NULL
pe <- NULL
rd <- NULL
median_vax = median(d_l$Series_Complete_Pop_Pct, na.rm=TRUE)
max_vax = max(d_l$Series_Complete_Pop_Pct, na.rm=TRUE)
plot_vax <- NULL
n <- 1
for (j in seq(median_vax,max_vax,by=1)){
mval.list_combined <- list()
mval.list_combined <- list(j)
fit_l_combined <- cmest(
data=d_l_rf, model="gformula",
outcome = Y, exposure = E, mediator = M, basec = C,
yreg="linear", mreg=rep(list("linear"),length(M)),
astar = median(d_l$pctPoverty, na.rm=TRUE), a = median(d_l$pctPoverty, na.rm=TRUE)+10,
EMint=TRUE,
mval = mval.list_combined,
estimation = "imputation", multimp = FALSE,
inference = "bootstrap", nboot = 1)
vax[n] = j
pe[n] = summary(fit_l_combined)[9]$effect.pe["pe"] * 100
#create regression with E, M, C
reg = glm(deaths_2021 ~ Series_Complete_Pop_Pct + pctPoverty + pctPoverty*Series_Complete_Pop_Pct +
PropMale +
PropAbove65 +
no.college.diploma +
native +
asian +
black +
white +
hispanic +
state,
family = gaussian(link="identity"), data = d_l_rf)
d_l_mmax <- d_l_rf
d_l_mmax$Series_Complete_Pop_Pct <- j
#risk reduction (how many fewer deaths per 100,000 population with the specified mediator values?)
rd[n] = -(mean(predict(reg, newdata=d_l_mmax)) - mean(d_l_mmax$deaths_2021))
n <- n+1
}
#max risk reduction
-(mean(predict(reg, newdata=d_l_mmax)) - mean(d_l_mmax$deaths_2021))
t.test(d_l_mmax$deaths_2021, predict(reg, newdata=d_l_mmax), paired=TRUE)
plot_vax <- data.frame(pe, rd, vax)
fig_vax = ggplot(data = plot_vax, aes(x = pe,y = rd, color = vax)) +
geom_point(size = 3) +
scale_x_continuous("Proportion eliminated of the poverty-COVID-19 death rate association (%)") +
scale_y_continuous("Average reduction in COVID-19 deaths per 100,000") +
scale_color_gradient(low="blue", high="red") +
labs(color="Vaccination \nrate (%)") +
theme(legend.position = c(0.8, 0.3),legend.background = element_rect(fill="white",linetype="solid", color ="black"))
#Variable selection
set.seed(123)
Y = "deaths_2021"
E = "pctPoverty"
M = c(
"Series_Complete_Pop_Pct"
)
C = c("PropMale",
"PropAbove65",
"no.college.diploma",
"native",
"asian",
"black",
"white",
"hispanic",
"state"
)
d = sdf[c("fips",Y,E,M,C)]
createfmla <- function(yvar, xvars) {
rhs <- paste(xvars, collapse=" + ")
return(as.formula(paste(yvar, "~", rhs, sep=" ")))
}
createint <- function(Y,E,M) {
list <- NULL
for(i in 1:length(M)){
list <- c(list,paste(E,M[i], sep="*"))
}
rhs <- paste(list, collapse=" + ")
return(as.formula(paste(Y, "~", rhs, sep=" ")))
}
set.seed(123)
#create dataset
d_l <- subset(d, state %in% c("AL","AK","AZ","CA","CO","CT","DE",
"FL","GA","HI","ID","IL","IN","DC",
"KS","KY","LA","ME","MD","MA","MI",
"MN","MT","MS","MO","NV","NH","NJ",
"NM","NY","NC","OH","OK","OR","PA",
"RI","SC","TN","TX","VA","VT","WA",
"WV","WI", #lockdown states
"AR","IA","NE","ND","SD","UT","WY" #no lockdown states
))
#impute with RF
set.seed(123)
d_l_mf <- d_l
d_l_mf$state <- factor(d_l_mf$state)
d_l_mf <- subset(d_l_mf, select=-c(fips))
dimp <- missForest(xmis = d_l_mf, ntree=100, maxiter = 10, parallelize = 'no')
# dimp$OOBerror
# dimp$error
d_l_rf<-cbind(d_l$fips, dimp$ximp)
#the original association
reg = glm(createfmla(Y, E), family = gaussian(link="identity"), data = d_l_rf)
summary(reg)
#create regression with E, M, C, and predict Y with max vax
#with E-M interaction
reg = glm(deaths_2021 ~ pctPoverty + Series_Complete_Pop_Pct + pctPoverty*Series_Complete_Pop_Pct +
PropMale +
PropAbove65 +
no.college.diploma +
native +
asian +
black +
white +
hispanic +
state,
family = gaussian(link="identity"), data = d_l_rf)
d_l_mmax <- d_l_rf
d_l_mmax$Series_Complete_Pop_Pct <- max(d_l$Series_Complete_Pop_Pct,na.rm=TRUE)
d_l_mmax[Y] <- predict(reg, newdata=d_l_mmax)
#Regress with predicted Y
reg = glm(createfmla(Y, E), family = gaussian(link="identity"), data = d_l_mmax)
summary(reg)
#add to scatterplot
fig <- ggpubr::ggscatter(data=d_l, x="pctPoverty",y="deaths_2021", fill="grey45", color="grey45",
xlab = "Poverty rate (%)", ylab = "COVID-19 deaths per 100,000",
add = "reg.line", #conf.int = TRUE,
#cor.coef = TRUE,
cor.method = "pearson",
add.params = list(color = "Black",fill = "Black")
)
scatterplot_vax =
fig +
geom_segment(aes(x = min(d_l$pctPoverty, na.rm=TRUE), xend = max(d_l$pctPoverty, na.rm=TRUE),
y = coef(reg)["(Intercept)"] + coef(reg)["pctPoverty"]*min(d_l$pctPoverty, na.rm=TRUE),
yend = coef(reg)["(Intercept)"] + coef(reg)["pctPoverty"]*max(d_l$pctPoverty, na.rm=TRUE)),
color="red", lwd=1) +
xlim(min(d_l$pctPoverty, na.rm=TRUE), max(d_l$pctPoverty, na.rm=TRUE)) +
annotate(geom="text", x=45, y=110, label="Vaccination rate \nmaximized", color="red") +
annotate(geom="text", x=45, y=290, label="Observed \nvaccination rate", color="Black")
pdf(sprintf("%s/figures/southwest_vax.pdf",rootdir), width=6, height=10)
ggarrange(fig_vax, scatterplot_vax, ncol = 1, labels = c("A", "B"))
## `geom_smooth()` using formula 'y ~ x'
dev.off()
ggarrange(fig_vax, scatterplot_vax, ncol = 1, labels = c("A", "B"))
## `geom_smooth()` using formula 'y ~ x'